<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>OpenAI WebRTC Audio Session</title>
  <style>
  * {
    box-sizing: border-box;
  }

  body {
    font-family: Helvetica, Arial, sans-serif;
    margin: 0;
    padding: 20px;
    background: #f5f5f5;
  }

  .container {
    max-width: 1200px;
    margin: 0 auto;
  }

  .audio-indicator {
    display: inline-block;
    width: 20px;
    height: 20px;
    border-radius: 50%;
    background: #ccc;
    margin-right: 10px;
    vertical-align: middle;
  }

  .audio-indicator.active {
    background: #4CAF50;
    animation: pulse 1s infinite;
  }

  @keyframes pulse {
    0% { opacity: 1; }
    50% { opacity: 0.5; }
    100% { opacity: 1; }
  }

  .controls {
    margin: 20px 0;
  }

  .form-group {
    margin-bottom: 15px;
  }

  label {
    display: block;
    margin-bottom: 5px;
    font-weight: bold;
  }

  input, select {
    width: 100%;
    padding: 8px;
    font-size: 16px;
    border: 1px solid #ddd;
    border-radius: 4px;
  }

  button {
    background: #007bff;
    color: white;
    border: none;
    padding: 10px 20px;
    font-size: 16px;
    border-radius: 4px;
    cursor: pointer;
  }

  button:disabled {
    background: #ccc;
    cursor: not-allowed;
  }

  .status {
    margin-top: 10px;
    padding: 10px;
    border-radius: 4px;
  }

  .error {
    background: #fee;
    color: #c00;
  }

  .success {
    background: #efe;
    color: #0a0;
  }

  .stats-container {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 20px;
    margin: 20px 0;
  }

  @media (max-width: 1000px) {
    .stats-container {
      grid-template-columns: 1fr;
    }
  }

  .stats-box {
    background: #fff;
    border-radius: 4px;
    padding: 20px;
    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
  }

  .stats-box h2 {
    margin: 0 0 15px 0;
    padding-bottom: 10px;
    border-bottom: 1px solid #eee;
  }

  .stats-grid {
    display: grid;
    grid-template-columns: repeat(3, 1fr);
    gap: 20px;
  }

  .stats-section {
    padding: 10px;
    background: #f8f9fa;
    border-radius: 4px;
  }

  .stats-section h3 {
    margin: 0 0 10px 0;
    font-size: 1.1em;
    color: #495057;
  }

  .stats-section div {
    margin: 5px 0;
    font-size: 0.95em;
  }

  .stats-section span {
    font-weight: bold;
    color: #007bff;
  }

  .events-container {
    margin-top: 20px;
    background: #fff;
    border-radius: 4px;
    padding: 15px;
    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
  }

  .event-entry {
    font-family: monospace;
    white-space: pre;
    padding: 10px;
    margin: 10px 0;
    background: #f8f9fa;
    border-radius: 4px;
    border-left: 4px solid #007bff;
  }

  .event-timestamp {
    color: #666;
    font-size: 0.9em;
    margin-bottom: 5px;
  }
  </style>
</head>
<body>
  <div class="container">
    <h1>
      <span id="audioIndicator" class="audio-indicator"></span>
      OpenAI WebRTC Audio Session
    </h1>

    <div class="controls">
      <div class="form-group">
        <label for="tokenInput">OpenAI API Token</label>
        <input type="password" id="tokenInput">
      </div>
      <div class="form-group">
        <label for="voiceSelect">Voice</label>
        <select id="voiceSelect">
          <option value="ash">Ash</option>
          <option value="ballad">Ballad</option>
          <option value="coral">Coral</option>
          <option value="sage">Sage</option>
          <option value="verse">Verse</option>
        </select>
      </div>
      <button id="startButton">Start Session</button>
    </div>

    <div id="status" class="status"></div>
    
    <div class="stats-container">
      <div class="stats-box">
        <h2>Most recent interaction</h2>
        <div class="stats-grid">
          <div class="stats-section">
            <h3>Input tokens</h3>
            <div>Audio: <span id="currentAudioInputTokens">0</span></div>
            <div>Text: <span id="currentTextInputTokens">0</span></div>
            <div>Cached: <span id="currentCachedTokens">0</span></div>
          </div>
          <div class="stats-section">
            <h3>Output tokens</h3>
            <div>Audio: <span id="currentAudioOutputTokens">0</span></div>
            <div>Text: <span id="currentTextOutputTokens">0</span></div>
          </div>
          <div class="stats-section">
            <h3>Costs</h3>
            <div>Input cost: <span id="currentInputCost">$0.00</span></div>
            <div>Output cost: <span id="currentOutputCost">$0.00</span></div>
            <div>Total cost: <span id="currentTotalCost">$0.00</span></div>
          </div>
        </div>
      </div>

      <div class="stats-box">
        <h2>Session total</h2>
        <div class="stats-grid">
          <div class="stats-section">
            <h3>Input tokens</h3>
            <div>Audio: <span id="totalAudioInputTokens">0</span></div>
            <div>Text: <span id="totalTextInputTokens">0</span></div>
            <div>Cached: <span id="totalCachedTokens">0</span></div>
          </div>
          <div class="stats-section">
            <h3>Output tokens</h3>
            <div>Audio: <span id="totalAudioOutputTokens">0</span></div>
            <div>Text: <span id="totalTextOutputTokens">0</span></div>
          </div>
          <div class="stats-section">
            <h3>Costs</h3>
            <div>Input cost: <span id="totalInputCost">$0.00</span></div>
            <div>Output cost: <span id="totalOutputCost">$0.00</span></div>
            <div>Total cost: <span id="totalTotalCost">$0.00</span></div>
          </div>
        </div>
      </div>
    </div>

    <p>I'm not 100% confident that these cost calculations are correct.</p>

    <div class="events-container">
      <h2>Session Events</h2>
      <div id="events"></div>
    </div>
  </div>

  <script type="module">
// Track cumulative totals
let sessionTotals = {
  audioInputTokens: 0,
  textInputTokens: 0,
  cachedInputTokens: 0,
  audioOutputTokens: 0,
  textOutputTokens: 0,
  inputCost: 0,
  outputCost: 0,
  totalCost: 0
}

async function createRealtimeSession(inStream, token, voice) {
  const pc = new RTCPeerConnection()
  
  pc.ontrack = e => {
    const audio = new Audio()
    audio.srcObject = e.streams[0]
    audio.play()
  }
  
  pc.addTrack(inStream.getTracks()[0])
  
  const dc = pc.createDataChannel("oai-events")
  dc.addEventListener("message", (e) => {
    try {
      const eventData = JSON.parse(e.data)
      
      if (eventData.type === 'response.done' && 
          eventData.response && 
          eventData.response.usage) {
        const usage = eventData.response.usage
        const inputDetails = usage.input_token_details
        const outputDetails = usage.output_token_details
        const cachedDetails = inputDetails.cached_tokens_details

        const currentStats = {
          audioInputTokens: inputDetails.audio_tokens - cachedDetails.audio_tokens,
          textInputTokens: inputDetails.text_tokens - cachedDetails.text_tokens,
          cachedInputTokens: inputDetails.cached_tokens,
          audioOutputTokens: outputDetails.audio_tokens,
          textOutputTokens: outputDetails.text_tokens
        }

        const costs = calculateCosts(currentStats)
        
        // Update current interaction display
        updateCurrentStats(currentStats, costs)
        
        // Update session totals
        updateSessionTotals(currentStats, costs)
      }
      
      addEventToLog(eventData)
    } catch (err) {
      console.error('Error parsing event data:', err)
    }
  })
  
  const offer = await pc.createOffer()
  await pc.setLocalDescription(offer)
  
  const headers = {
    Authorization: `Bearer ${token}`,
    'Content-Type': 'application/sdp'
  }
  
  const opts = {
    method: 'POST',
    body: offer.sdp,
    headers
  }
  
  const model = 'gpt-4o-realtime-preview-2024-12-17'
  const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
  
  await pc.setRemoteDescription({
    type: 'answer',
    sdp: await resp.text()
  })
  
  return pc
}

function calculateCosts({audioInputTokens, textInputTokens, cachedInputTokens, audioOutputTokens, textOutputTokens}) {
  const AUDIO_INPUT_COST = 0.00004
  const AUDIO_OUTPUT_COST = 0.00008
  const CACHED_AUDIO_COST = 0.0000025
  const TEXT_INPUT_COST = 0.0000025
  const TEXT_OUTPUT_COST = 0.00001

  const audioInputCost = audioInputTokens * AUDIO_INPUT_COST
  const cachedInputCost = cachedInputTokens * CACHED_AUDIO_COST
  const textInputCost = textInputTokens * TEXT_INPUT_COST
  const audioOutputCost = audioOutputTokens * AUDIO_OUTPUT_COST
  const textOutputCost = textOutputTokens * TEXT_OUTPUT_COST

  return {
    inputCost: audioInputCost + cachedInputCost + textInputCost,
    outputCost: audioOutputCost + textOutputCost,
    totalCost: audioInputCost + cachedInputCost + textInputCost + audioOutputCost + textOutputCost
  }
}

function updateCurrentStats(stats, costs) {
  document.getElementById('currentAudioInputTokens').textContent = stats.audioInputTokens.toLocaleString()
  document.getElementById('currentTextInputTokens').textContent = stats.textInputTokens.toLocaleString()
  document.getElementById('currentCachedTokens').textContent = stats.cachedInputTokens.toLocaleString()
  document.getElementById('currentAudioOutputTokens').textContent = stats.audioOutputTokens.toLocaleString()
  document.getElementById('currentTextOutputTokens').textContent = stats.textOutputTokens.toLocaleString()
  document.getElementById('currentInputCost').textContent = `$${costs.inputCost.toFixed(4)}`
  document.getElementById('currentOutputCost').textContent = `$${costs.outputCost.toFixed(4)}`
  document.getElementById('currentTotalCost').textContent = `$${costs.totalCost.toFixed(4)}`
}

function updateSessionTotals(currentStats, costs) {
  // Update running totals
  sessionTotals.audioInputTokens += currentStats.audioInputTokens
  sessionTotals.textInputTokens += currentStats.textInputTokens
  sessionTotals.cachedInputTokens += currentStats.cachedInputTokens
  sessionTotals.audioOutputTokens += currentStats.audioOutputTokens
  sessionTotals.textOutputTokens += currentStats.textOutputTokens
  sessionTotals.inputCost += costs.inputCost
  sessionTotals.outputCost += costs.outputCost
  sessionTotals.totalCost += costs.totalCost

  // Update display
  document.getElementById('totalAudioInputTokens').textContent = sessionTotals.audioInputTokens.toLocaleString()
  document.getElementById('totalTextInputTokens').textContent = sessionTotals.textInputTokens.toLocaleString()
  document.getElementById('totalCachedTokens').textContent = sessionTotals.cachedInputTokens.toLocaleString()
  document.getElementById('totalAudioOutputTokens').textContent = sessionTotals.audioOutputTokens.toLocaleString()
  document.getElementById('totalTextOutputTokens').textContent = sessionTotals.textOutputTokens.toLocaleString()
  document.getElementById('totalInputCost').textContent = `$${sessionTotals.inputCost.toFixed(4)}`
  document.getElementById('totalOutputCost').textContent = `$${sessionTotals.outputCost.toFixed(4)}`
  document.getElementById('totalTotalCost').textContent = `$${sessionTotals.totalCost.toFixed(4)}`
}

function addEventToLog(eventData) {
      const eventsContainer = document.getElementById('events');
      const eventEntry = document.createElement('div');
      eventEntry.className = 'event-entry';
      
      const timestamp = document.createElement('div');
      timestamp.className = 'event-timestamp';
      timestamp.textContent = new Date().toISOString();
      
      const content = document.createElement('div');
      content.textContent = JSON.stringify(eventData, null, 2);
      
      eventEntry.appendChild(timestamp);
      eventEntry.appendChild(content);
      
      // Add new events at the top
      eventsContainer.insertBefore(eventEntry, eventsContainer.firstChild);
      
      // Optional: limit number of displayed events to prevent excessive DOM growth
      while (eventsContainer.children.length > 50) {
        eventsContainer.removeChild(eventsContainer.lastChild);
      }
    }

    const startButton = document.getElementById('startButton')
    const tokenInput = document.getElementById('tokenInput')
    const voiceSelect = document.getElementById('voiceSelect')
    const status = document.getElementById('status')
    const audioIndicator = document.getElementById('audioIndicator')

    let peerConnection = null
    let audioContext = null
    let audioStream = null

    // Load saved API key on page load
    document.addEventListener('DOMContentLoaded', () => {
      const savedToken = localStorage.getItem('openai_api_key')
      if (savedToken) {
        tokenInput.value = savedToken
      }
    })

    // Audio visualization
    function setupAudioVisualization(stream) {
      audioContext = new AudioContext()
      const source = audioContext.createMediaStreamSource(stream)
      const analyzer = audioContext.createAnalyser()
      analyzer.fftSize = 256
      
      source.connect(analyzer)
      
      const bufferLength = analyzer.frequencyBinCount
      const dataArray = new Uint8Array(bufferLength)
      
      function updateIndicator() {
        if (!audioContext) return
        
        analyzer.getByteFrequencyData(dataArray)
        const average = dataArray.reduce((a, b) => a + b) / bufferLength
        
        audioIndicator.classList.toggle('active', average > 30)
        requestAnimationFrame(updateIndicator)
      }
      
      updateIndicator()
    }

    async function startSession() {
      try {
        // Save API key to localStorage
        localStorage.setItem('openai_api_key', tokenInput.value)
        
        status.className = 'status'
        status.textContent = 'Requesting microphone access...'
        
        audioStream = await navigator.mediaDevices.getUserMedia({
          audio: true,
          video: false
        })
        
        setupAudioVisualization(audioStream)
        
        status.textContent = 'Establishing connection...'
        
        peerConnection = await createRealtimeSession(
          audioStream,
          tokenInput.value,
          voiceSelect.value
        )
        
        status.className = 'status success'
        status.textContent = 'Session established successfully!'
        startButton.textContent = 'Stop Session'
        
      } catch (err) {
        status.className = 'status error'
        status.textContent = `Error: ${err.message}`
        console.error('Session error:', err)
        stopSession()
      }
    }

    function stopSession() {
      if (peerConnection) {
        peerConnection.close()
        peerConnection = null
      }
      
      if (audioContext) {
        audioContext.close()
        audioContext = null
      }
      
      if (audioStream) {
        audioStream.getTracks().forEach(track => track.stop())
        audioStream = null
      }
      
      audioIndicator.classList.remove('active')
      startButton.textContent = 'Start Session'
    }

    startButton.addEventListener('click', () => {
      if (peerConnection) {
        stopSession()
      } else {
        if (!tokenInput.value) {
          status.className = 'status error'
          status.textContent = 'Please enter an API token'
          return
        }
        startSession()
      }
    })

    // Cleanup on page unload
    window.addEventListener('beforeunload', stopSession)
  </script>
</body>
</html>
